Morning data for 2020-04-07 Disclaimers:

  • US data is only provided by state, not aggregated
  • growth data represents day-by-day growth as a fraction, thus if there were 100 cases yesterday and 120 today, growth will be 0.2
  • growth data is 7-day moving averaged, and only provided after there are both > 100 cases and > 10 deaths in a locale
  • recovered numbers are not accounted for at all. This can result in case growth rate seeming smaller than it actually is (because all cases are assumed to be active)
  • growth rate for deaths might be better calculated as fraction of total cases 2-3 weeks ago, rather than as a fraction of total deaths. So take that data for what it is intended, a general indicator, not a rigorous statistic
In [2]:
import requests
from matplotlib import pyplot as plt
import csv
from datetime import datetime
from datetime import timedelta, date

states_url = 'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-states.csv'
cases_url = "https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv"
deaths_url = "https://github.com/CSSEGISandData/COVID-19/raw/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv"

with requests.get(states_url, stream=True) as r:
    text = r.iter_lines(decode_unicode='utf-8')
    reader = csv.reader(text, delimiter=',')
    states_rows = []
    for row in reader:
        states_rows.append(row)      

with requests.get(cases_url, stream=True) as r:
    text = r.iter_lines(decode_unicode='utf-8')
    reader = csv.reader(text, delimiter=',')
    cases_rows = []
    for row in reader:
        cases_rows.append(row)

with requests.get(deaths_url, stream=True) as r:
    text = r.iter_lines(decode_unicode='utf-8')
    reader = csv.reader(text, delimiter=',')
    deaths_rows = []
    for row in reader:
        deaths_rows.append(row)

def get_data(cases_rows, deaths_rows, states_rows):
    """
    cases_rows::rows of cases for each province, country in Johns Hopkins dataset
    deaths_rows::rows of deaths for each province, country in Johns Hopkins dataset
    states_rows::rows of date, state, cases, deaths from NYT dataset
    
    returns
    dates[locale]::array of dates for which we have data for a locale
    cases[locale]::array of cases for each date in dates
    deaths[locale]::array of deaths for each date in dates
    where locale is a tuple in the format of (state, )
    """
    locales = {(row[0],row[1]) for row in cases_rows[1:]}
    locales |= {(row[0],row[1]) for row in deaths_rows[1:]}
    locales |= {(row[1],'US') for row in states_rows}
    cases_header = cases_rows[0]
    deaths_header = deaths_rows[0]
    nyt_header = states_rows[0]
    
    assert len(cases_header)==len(deaths_header), "cases and deaths not of equal length"
    
    cases, deaths, dates = {},{},{}
    for locale in locales:
        cases[locale] = []
        deaths[locale] = []                
        dates[locale] = []
        for item in cases_header[4:]:
            dates[locale].append(datetime.strptime(item,
                                                   "%m/%d/%y").date())     
    for row in cases_rows[1:]:
        locale = (row[0],row[1])
        for item in row[4:]:
            cases[locale].append(int(item))

    for row in deaths_rows[1:]:
        locale = (row[0],row[1])
        for item in row[4:]:
            deaths[locale].append(int(item))
    
    # create date zero, and set all values to zero
    first_date = datetime.strptime(cases_header[4],
                                   "%m/%d/%y").date()
    us_state_locales = {locale for locale in locales if
                        locale[1]=='US' and locale[0] is not ''}
    for state in us_state_locales:
        dates[state]=[first_date]
        cases[state].append(0)
        deaths[state].append(0)
    first_date = first_date + timedelta(days=1)
    last_date = datetime.strptime(states_rows[-1][0],
                                  "%Y-%m-%d").date()
    def date_iter(start_date, end_date):
        """
        start_date::first day of returned generator
        end_date::final day of returned generator
        yield dates start with start_date until end_date one
        at a time
        """
        curr_date = start_date
        while curr_date < end_date:
            yield curr_date
            curr_date = curr_date + timedelta(days = 1)
        yield curr_date
        return
    
    for curr_day in date_iter(first_date, last_date):
        # new date.  Copy over last date's data
        for state in us_state_locales:
            dates[state].append(curr_day)
            try:
                cases[state].append(cases[state][-1])
                deaths[state].append(deaths[state][-1])
            except IndexError:
                # list was empty, start with zero
                assert len(cases[state])==0, "length should be zero"
                assert len(deaths[state])==0, "length should be zero"
                cases[state].append(0)
                deaths[state].append(0)
        # now go through dataset and whenever date matches
        # current date, fix data so it isn't a simple copy
        # of the previous day's data
        for row in states_rows[1:]:
            row_date = datetime.strptime(row[0],
                                         "%Y-%m-%d").date()
            if row_date == curr_day:
                locale = (row[1],'US')
                cases[locale][-1]=int(row[3])
                deaths[locale][-1]=int(row[4])
    return dates, cases, deaths, locales
    
dates, cases, deaths, locales = get_data(cases_rows, deaths_rows, states_rows)

def discrete_growth_fraction(data):
    result = []
    for elt1, elt2 in zip(data[:-1],data[1:]):
        if (elt1 + elt2) == 0:
            result.append(0)
        elif elt2 < 10:  # ignore very early growth
            result.append(0)
        else:
            result.append((elt2 - elt1)*2/(elt1 + elt2))
    return result

def plot_growth_fraction(locale, plot_type):
    """
    locale::location to track in (province, country) form
    plot_type::'cases' or 'deaths'
    """
    if plot_type == 'cases':
        plt.plot(discrete_growth_fraction(cases[locale]))
        plt.ylabel('cases growth fraction')
    elif plot_type == 'deaths':
        plt.plot(discrete_growth_fraction(deaths[locale]))
        plt.ylabel('deaths growth fraction')    
    plt.title(locale[1])
    plt.xlabel('sample (day)')
    plt.show()

def moving_average(data, window):
    """
    data::array to be averaged
    window::integer length to average over
    """
    result=[]
    for idx in range(len(data) - window + 1):
        result.append(sum(data[idx:idx+window])/window)
    return result
assert moving_average([1,3,5,7],1)==[1,3,5,7]
assert moving_average([1,3,5,7],2)==[2,4,6]
assert moving_average([1],1)==[1]

def plot_growth_with_total(locale, window = 1):
    """
    locale::in format ('province','country'), leave province '' if none
    plot_type::'cases' or 'deaths'
    window::moving window to average over
    """
    plot_types = {'cases', 'deaths'}
    data = {}
    data['cases'] = cases[locale]
    data['deaths'] = deaths[locale]
    processed_data = {}
    
    t = dates[locale]
    for plot_type in plot_types:
        growth_data = discrete_growth_fraction(data[plot_type])
        avg_data = moving_average(growth_data, window)
        data1 = [0 for _ in range(window)]
        data1.extend(avg_data)
        processed_data[plot_type] = data1
    
    fig, ax1 = plt.subplots()

    color = 'tab:blue'
    if locale[0] != '':
        title = locale[0] + ', ' + locale[1]
    else:
        title = locale[1]
    ax1.set_title(title)
    ax1.set_xlabel('day')
    ax1.set_ylabel('growth', color=color)
    growth_lines={}
    for plot_type,color in [('cases','tab:green'),('deaths','tab:blue')]:
        growth_lines[plot_type],=ax1.plot(t, processed_data[plot_type], color= color, label=plot_type + ' growth')
    ax1.tick_params(axis='y', labelcolor='tab:blue')
    ax1.legend(loc='upper left')
    ax1.grid(axis = 'y')
    ax1.set_yticks([0,0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5])

    ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis
    
    total_lines={}
    ax2.set_ylabel('total')  # we already handled the x-label with ax1
    ax2.set_yscale('log')
    for plot_type,color in [('cases','tab:orange'),('deaths','tab:red')]:
        total_lines[plot_type],=ax2.plot(t, data[plot_type], color=color, label=plot_type + ' totals')
    ax2.tick_params(axis='y', labelcolor= 'tab:red')
    ax2.legend(loc='center left')
    
    
    fig.tight_layout()  # otherwise the right y-label is slightly clipped
    plt.show()
    
# final output section
sorted_locales = sorted(list(locales),key=lambda x: x[1] + x[0])
for locale in sorted_locales:
    if locale[0] != '':
        print(locale[0] + ", " + locale[1])
    else:
        print(locale[1])
    if max(cases[locale]) > 100 and max(deaths[locale]) > 10:
        plot_growth_with_total(locale, window = 7)
Afghanistan
Albania
Algeria
Andorra
Angola
Antigua and Barbuda
Argentina
Armenia
Australian Capital Territory, Australia
New South Wales, Australia
Northern Territory, Australia
Queensland, Australia
South Australia, Australia
Tasmania, Australia
Victoria, Australia
Western Australia, Australia
Austria
Azerbaijan
Bahamas
Bahrain
Bangladesh
Barbados
Belarus
Belgium
Belize
Benin
Bhutan
Bolivia
Bosnia and Herzegovina
Botswana
Brazil
Brunei
Bulgaria
Burkina Faso
Burma
Burundi
Cabo Verde
Cambodia
Cameroon
Alberta, Canada
British Columbia, Canada
Diamond Princess, Canada
Grand Princess, Canada
Manitoba, Canada
New Brunswick, Canada
Newfoundland and Labrador, Canada
Northwest Territories, Canada
Nova Scotia, Canada
Ontario, Canada
Prince Edward Island, Canada
Quebec, Canada
Recovered, Canada
Saskatchewan, Canada
Yukon, Canada
Central African Republic
Chad
Chile
Anhui, China
Beijing, China
Chongqing, China
Fujian, China
Gansu, China
Guangdong, China
Guangxi, China
Guizhou, China
Hainan, China
Hebei, China
Heilongjiang, China
Henan, China
Hong Kong, China
Hubei, China
Hunan, China
Inner Mongolia, China
Jiangsu, China
Jiangxi, China
Jilin, China
Liaoning, China
Macau, China
Ningxia, China
Qinghai, China
Shaanxi, China
Shandong, China
Shanghai, China
Shanxi, China
Sichuan, China
Tianjin, China
Tibet, China
Xinjiang, China
Yunnan, China
Zhejiang, China
Colombia
Congo (Brazzaville)
Congo (Kinshasa)
Costa Rica
Cote d'Ivoire
Croatia
Cuba
Cyprus
Czechia
Denmark
Faroe Islands, Denmark
Greenland, Denmark
Diamond Princess
Djibouti
Dominica
Dominican Republic
Ecuador
Egypt
El Salvador
Equatorial Guinea
Eritrea
Estonia
Eswatini
Ethiopia
Fiji
Finland
France
French Guiana, France
French Polynesia, France
Guadeloupe, France
Martinique, France
Mayotte, France
New Caledonia, France
Reunion, France
Saint Barthelemy, France
Saint Pierre and Miquelon, France
St Martin, France
Gabon
Gambia
Georgia
Germany
Ghana
Greece
Grenada
Guatemala
Guinea
Guinea-Bissau
Guyana
Haiti
Holy See
Honduras
Hungary
Iceland
India
Indonesia
Iran
Iraq
Ireland
Israel
Italy
Jamaica
Japan
Jordan
Kazakhstan
Kenya
Korea, South
Kosovo
Kuwait
Kyrgyzstan
Laos
Latvia
Lebanon
Liberia
Libya
Liechtenstein
Lithuania
Luxembourg
MS Zaandam
Madagascar
Malawi
Malaysia
Maldives
Mali
Malta
Mauritania
Mauritius
Mexico
Moldova
Monaco
Mongolia
Montenegro
Morocco
Mozambique
Namibia
Nepal
Netherlands
Aruba, Netherlands
Bonaire, Sint Eustatius and Saba, Netherlands
Curacao, Netherlands
Sint Maarten, Netherlands
New Zealand
Nicaragua
Niger
Nigeria
North Macedonia
Norway
Oman
Pakistan
Panama
Papua New Guinea
Paraguay
Peru
Philippines
Poland
Portugal
Qatar
Romania
Russia
Rwanda
Saint Kitts and Nevis
Saint Lucia
Saint Vincent and the Grenadines
San Marino
Sao Tome and Principe
Saudi Arabia
Senegal
Serbia
Seychelles
Sierra Leone
Singapore
Slovakia
Slovenia
Somalia
South Africa
South Sudan
Spain
Sri Lanka
Sudan
Suriname
Sweden
Switzerland
Syria
Taiwan*
Tanzania
Thailand
Timor-Leste
Togo
Trinidad and Tobago
Tunisia
Turkey
US
Alabama, US
Alaska, US
Arizona, US
Arkansas, US
California, US
Colorado, US
Connecticut, US
Delaware, US
District of Columbia, US
Florida, US
Georgia, US
Guam, US
Hawaii, US
Idaho, US
Illinois, US
Indiana, US
Iowa, US
Kansas, US
Kentucky, US
Louisiana, US
Maine, US
Maryland, US
Massachusetts, US
Michigan, US
Minnesota, US
Mississippi, US
Missouri, US
Montana, US
Nebraska, US
Nevada, US
New Hampshire, US
New Jersey, US
New Mexico, US
New York, US
North Carolina, US
North Dakota, US
Northern Mariana Islands, US
Ohio, US
Oklahoma, US
Oregon, US
Pennsylvania, US
Puerto Rico, US
Rhode Island, US
South Carolina, US
South Dakota, US
Tennessee, US
Texas, US
Utah, US
Vermont, US
Virgin Islands, US
Virginia, US
Washington, US
West Virginia, US
Wisconsin, US
Wyoming, US
state, US
Uganda
Ukraine
United Arab Emirates
United Kingdom
Anguilla, United Kingdom
Bermuda, United Kingdom
British Virgin Islands, United Kingdom
Cayman Islands, United Kingdom
Channel Islands, United Kingdom
Falkland Islands (Islas Malvinas), United Kingdom
Gibraltar, United Kingdom
Isle of Man, United Kingdom
Montserrat, United Kingdom
Turks and Caicos Islands, United Kingdom
Uruguay
Uzbekistan
Venezuela
Vietnam
West Bank and Gaza
Western Sahara
Zambia
Zimbabwe